{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "c261e5f4-17a8-40da-beb9-599f1717e0fe",
   "metadata": {},
   "source": [
    "### 1. 安装HuggingFace 并下载模型到本地"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "02785614-9268-41c8-85a5-d579490edbbf",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
      "pytest-astropy 0.8.0 requires pytest-cov>=2.0, which is not installed.\n",
      "pytest-astropy 0.8.0 requires pytest-filter-subpackage>=0.1, which is not installed.\n",
      "spyder 4.0.1 requires pyqt5<5.13; python_version >= \"3\", which is not installed.\n",
      "spyder 4.0.1 requires pyqtwebengine<5.13; python_version >= \"3\", which is not installed.\n",
      "sagemaker 2.165.0 requires importlib-metadata<5.0,>=1.4.0, but you have importlib-metadata 6.6.0 which is incompatible.\n",
      "sparkmagic 0.20.4 requires nest-asyncio==1.5.5, but you have nest-asyncio 1.5.6 which is incompatible.\n",
      "spyder 4.0.1 requires jedi==0.14.1, but you have jedi 0.18.2 which is incompatible.\u001b[0m\u001b[31m\n",
      "\u001b[0mRequirement already satisfied: sagemaker in /opt/conda/lib/python3.7/site-packages (2.165.0)\n",
      "Collecting sagemaker\n",
      "  Downloading sagemaker-2.170.0.tar.gz (852 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m852.4/852.4 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
      "\u001b[?25hRequirement already satisfied: attrs<24,>=23.1.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (23.1.0)\n",
      "Requirement already satisfied: boto3<2.0,>=1.26.131 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.26.154)\n",
      "Requirement already satisfied: cloudpickle==2.2.1 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (2.2.1)\n",
      "Requirement already satisfied: google-pasta in /opt/conda/lib/python3.7/site-packages (from sagemaker) (0.2.0)\n",
      "Requirement already satisfied: numpy<2.0,>=1.9.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.21.6)\n",
      "Requirement already satisfied: protobuf<4.0,>=3.1 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (3.20.3)\n",
      "Requirement already satisfied: protobuf3-to-dict<1.0,>=0.1.5 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (0.1.5)\n",
      "Requirement already satisfied: smdebug_rulesconfig==1.0.1 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.0.1)\n",
      "Collecting importlib-metadata<5.0,>=1.4.0 (from sagemaker)\n",
      "  Downloading importlib_metadata-4.13.0-py3-none-any.whl (23 kB)\n",
      "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (23.1)\n",
      "Requirement already satisfied: pandas in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.3.5)\n",
      "Requirement already satisfied: pathos in /opt/conda/lib/python3.7/site-packages (from sagemaker) (0.3.0)\n",
      "Requirement already satisfied: schema in /opt/conda/lib/python3.7/site-packages (from sagemaker) (0.7.5)\n",
      "Requirement already satisfied: PyYAML==6.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (6.0)\n",
      "Requirement already satisfied: jsonschema in /opt/conda/lib/python3.7/site-packages (from sagemaker) (3.2.0)\n",
      "Requirement already satisfied: platformdirs in /opt/conda/lib/python3.7/site-packages (from sagemaker) (3.5.3)\n",
      "Requirement already satisfied: tblib==1.7.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.7.0)\n",
      "Requirement already satisfied: botocore<1.30.0,>=1.29.154 in /opt/conda/lib/python3.7/site-packages (from boto3<2.0,>=1.26.131->sagemaker) (1.29.154)\n",
      "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /opt/conda/lib/python3.7/site-packages (from boto3<2.0,>=1.26.131->sagemaker) (1.0.1)\n",
      "Requirement already satisfied: s3transfer<0.7.0,>=0.6.0 in /opt/conda/lib/python3.7/site-packages (from boto3<2.0,>=1.26.131->sagemaker) (0.6.1)\n",
      "Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata<5.0,>=1.4.0->sagemaker) (2.2.0)\n",
      "Requirement already satisfied: typing-extensions>=3.6.4 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata<5.0,>=1.4.0->sagemaker) (4.6.3)\n",
      "Requirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from protobuf3-to-dict<1.0,>=0.1.5->sagemaker) (1.14.0)\n",
      "Requirement already satisfied: pyrsistent>=0.14.0 in /opt/conda/lib/python3.7/site-packages (from jsonschema->sagemaker) (0.15.7)\n",
      "Requirement already satisfied: setuptools in /opt/conda/lib/python3.7/site-packages (from jsonschema->sagemaker) (65.5.1)\n",
      "Requirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.7/site-packages (from pandas->sagemaker) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2017.3 in /opt/conda/lib/python3.7/site-packages (from pandas->sagemaker) (2019.3)\n",
      "Requirement already satisfied: ppft>=1.7.6.6 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker) (1.7.6.6)\n",
      "Requirement already satisfied: dill>=0.3.6 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker) (0.3.6)\n",
      "Requirement already satisfied: pox>=0.3.2 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker) (0.3.2)\n",
      "Requirement already satisfied: multiprocess>=0.70.14 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker) (0.70.14)\n",
      "Requirement already satisfied: contextlib2>=0.5.5 in /opt/conda/lib/python3.7/site-packages (from schema->sagemaker) (0.6.0.post1)\n",
      "Collecting urllib3<1.27,>=1.25.4 (from botocore<1.30.0,>=1.29.154->boto3<2.0,>=1.26.131->sagemaker)\n",
      "  Downloading urllib3-1.26.16-py2.py3-none-any.whl (143 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.1/143.1 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hBuilding wheels for collected packages: sagemaker\n",
      "  Building wheel for sagemaker (setup.py) ... \u001b[?25ldone\n",
      "\u001b[?25h  Created wheel for sagemaker: filename=sagemaker-2.170.0-py2.py3-none-any.whl size=1158896 sha256=ae04a840c972e4b1ea3b2eec722551437dfaab93df0d5670adebc6f07944ffe6\n",
      "  Stored in directory: /root/.cache/pip/wheels/63/9b/cb/2be55b36e552867eafb5f710ff4f2a87dbf99288f268857e54\n",
      "Successfully built sagemaker\n",
      "Installing collected packages: urllib3, importlib-metadata, sagemaker\n",
      "  Attempting uninstall: urllib3\n",
      "    Found existing installation: urllib3 2.0.3\n",
      "    Uninstalling urllib3-2.0.3:\n",
      "      Successfully uninstalled urllib3-2.0.3\n",
      "  Attempting uninstall: importlib-metadata\n",
      "    Found existing installation: importlib-metadata 6.6.0\n",
      "    Uninstalling importlib-metadata-6.6.0:\n",
      "      Successfully uninstalled importlib-metadata-6.6.0\n",
      "  Attempting uninstall: sagemaker\n",
      "    Found existing installation: sagemaker 2.165.0\n",
      "    Uninstalling sagemaker-2.165.0:\n",
      "      Successfully uninstalled sagemaker-2.165.0\n",
      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
      "pytest-astropy 0.8.0 requires pytest-cov>=2.0, which is not installed.\n",
      "pytest-astropy 0.8.0 requires pytest-filter-subpackage>=0.1, which is not installed.\n",
      "spyder 4.0.1 requires pyqt5<5.13; python_version >= \"3\", which is not installed.\n",
      "spyder 4.0.1 requires pyqtwebengine<5.13; python_version >= \"3\", which is not installed.\n",
      "python-language-server 0.31.7 requires jedi<0.16,>=0.14.1, but you have jedi 0.18.2 which is incompatible.\n",
      "python-language-server 0.31.7 requires ujson<=1.35; platform_system != \"Windows\", but you have ujson 5.7.0 which is incompatible.\n",
      "sparkmagic 0.20.4 requires nest-asyncio==1.5.5, but you have nest-asyncio 1.5.6 which is incompatible.\n",
      "spyder 4.0.1 requires jedi==0.14.1, but you have jedi 0.18.2 which is incompatible.\u001b[0m\u001b[31m\n",
      "\u001b[0mSuccessfully installed importlib-metadata-4.13.0 sagemaker-2.170.0 urllib3-1.26.16\n",
      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
      "\u001b[0m"
     ]
    }
   ],
   "source": [
    "!pip install huggingface-hub -Uqq\n",
    "!pip install -U sagemaker"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0d846ed0-24cb-45d3-be44-42a28bf5ce72",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
      "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n",
      "Processing ./code/botocore-1.29.157-py3-none-any.whl\n",
      "Processing ./code/boto3-1.26.157-py3-none-any.whl\n",
      "Collecting jmespath<2.0.0,>=0.7.1 (from botocore==1.29.157)\n",
      "  Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n",
      "Collecting python-dateutil<3.0.0,>=2.1 (from botocore==1.29.157)\n",
      "  Downloading python_dateutil-2.8.2-py2.py3-none-any.whl (247 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m247.7/247.7 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hCollecting urllib3<1.27,>=1.25.4 (from botocore==1.29.157)\n",
      "  Using cached urllib3-1.26.16-py2.py3-none-any.whl (143 kB)\n",
      "Collecting s3transfer<0.7.0,>=0.6.0 (from boto3==1.26.157)\n",
      "  Downloading s3transfer-0.6.1-py3-none-any.whl (79 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.8/79.8 kB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hCollecting six>=1.5 (from python-dateutil<3.0.0,>=2.1->botocore==1.29.157)\n",
      "  Downloading six-1.16.0-py2.py3-none-any.whl (11 kB)\n",
      "Installing collected packages: urllib3, six, jmespath, python-dateutil, botocore, s3transfer, boto3\n",
      "  Attempting uninstall: urllib3\n",
      "    Found existing installation: urllib3 1.26.16\n",
      "    Uninstalling urllib3-1.26.16:\n",
      "      Successfully uninstalled urllib3-1.26.16\n",
      "  Attempting uninstall: six\n",
      "    Found existing installation: six 1.14.0\n",
      "    Uninstalling six-1.14.0:\n",
      "      Successfully uninstalled six-1.14.0\n",
      "  Attempting uninstall: jmespath\n",
      "    Found existing installation: jmespath 1.0.1\n",
      "    Uninstalling jmespath-1.0.1:\n",
      "      Successfully uninstalled jmespath-1.0.1\n",
      "  Attempting uninstall: python-dateutil\n",
      "    Found existing installation: python-dateutil 2.8.2\n",
      "    Uninstalling python-dateutil-2.8.2:\n",
      "      Successfully uninstalled python-dateutil-2.8.2\n",
      "  Attempting uninstall: botocore\n",
      "    Found existing installation: botocore 1.29.154\n",
      "    Uninstalling botocore-1.29.154:\n",
      "      Successfully uninstalled botocore-1.29.154\n",
      "  Attempting uninstall: s3transfer\n",
      "    Found existing installation: s3transfer 0.6.1\n",
      "    Uninstalling s3transfer-0.6.1:\n",
      "      Successfully uninstalled s3transfer-0.6.1\n",
      "  Attempting uninstall: boto3\n",
      "    Found existing installation: boto3 1.26.154\n",
      "    Uninstalling boto3-1.26.154:\n",
      "      Successfully uninstalled boto3-1.26.154\n",
      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
      "pytest-astropy 0.8.0 requires pytest-cov>=2.0, which is not installed.\n",
      "pytest-astropy 0.8.0 requires pytest-filter-subpackage>=0.1, which is not installed.\n",
      "spyder 4.0.1 requires pyqt5<5.13; python_version >= \"3\", which is not installed.\n",
      "spyder 4.0.1 requires pyqtwebengine<5.13; python_version >= \"3\", which is not installed.\n",
      "aiobotocore 2.4.2 requires botocore<1.27.60,>=1.27.59, but you have botocore 1.29.157 which is incompatible.\n",
      "awscli 1.27.154 requires botocore==1.29.154, but you have botocore 1.29.157 which is incompatible.\n",
      "awscli 1.27.154 requires PyYAML<5.5,>=3.10, but you have pyyaml 6.0 which is incompatible.\n",
      "awscli 1.27.154 requires rsa<4.8,>=3.1.2, but you have rsa 4.9 which is incompatible.\n",
      "sagemaker-datawrangler 0.4.3 requires sagemaker-data-insights==0.4.0, but you have sagemaker-data-insights 0.3.3 which is incompatible.\n",
      "sparkmagic 0.20.4 requires nest-asyncio==1.5.5, but you have nest-asyncio 1.5.6 which is incompatible.\n",
      "spyder 4.0.1 requires jedi==0.14.1, but you have jedi 0.18.2 which is incompatible.\u001b[0m\u001b[31m\n",
      "\u001b[0mSuccessfully installed boto3-1.26.157 botocore-1.29.157 jmespath-1.0.1 python-dateutil-2.8.2 s3transfer-0.6.1 six-1.16.0 urllib3-1.26.16\n",
      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
      "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "%pip install sagemaker pip --upgrade  --quiet\n",
    "# Note the following may error depending on which awscli is installed in your jupyter kernel, \n",
    "# but that is ok \n",
    "%pip install ./code/botocore-1.29.157-py3-none-any.whl ./code/boto3-1.26.157-py3-none-any.whl --force"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f6daddee-833f-4755-9541-b5e1ea675024",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!rm -rf ./LLM_chatglm2_stream_model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "9e6bd7ee-16a3-4f5a-8857-8bbba83eb9e7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from huggingface_hub import snapshot_download\n",
    "from pathlib import Path\n",
    "\n",
    "local_model_path = Path(\"./LLM_chatglm2_stream_model\")\n",
    "local_model_path.mkdir(exist_ok=True)\n",
    "model_name = \"THUDM/chatglm2-6b\"\n",
    "commit_hash = \"b1502f4f75c71499a3d566b14463edd62620ce9f\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "94e8abc5-a58e-40e2-b1e6-fbf48307c716",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "08337ee153ca4e79a804439a2a55460b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Fetching 18 files', max=18.0, style=ProgressStyle(descrip…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ad13b69710ff476b83f875cb6c49906f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)d62620ce9f/README.md', max=8079.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cc43b3a526634f438fd617f874134683",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)iguration_chatglm.py', max=2246.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "25cba0220ed040ae9df08099734d574c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)2620ce9f/config.json', max=1223.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "333be0787b114b4ea9e01388e97e64f5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)/modeling_chatglm.py', max=50717.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f047053a24cf45a1a7842e41ed0d864f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)0ce9f/.gitattributes', max=1519.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1da882883ed04f72afbb4df0d1cd4ac8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)20ce9f/MODEL_LICENSE', max=4133.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d6edec198dfb4fc9b73f492b91a6f8d3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)l-00002-of-00007.bin', max=1968299005.0, s…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a34f4ac19f1e4693a5b70d2d86359522",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)l-00001-of-00007.bin', max=1827780615.0, s…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "90ec2134d7894ea1bcadb35302aa0962",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)l-00003-of-00007.bin', max=1927414561.0, s…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f7a2c0330e79438caaae6de208f8f5f9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)l-00004-of-00007.bin', max=1815225523.0, s…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "35275d5f3acc4382881c95ab1e814825",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)l-00007-of-00007.bin', max=1052808067.0, s…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fd395f678a22422e839441ac0999bcea",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)l-00006-of-00007.bin', max=1927414561.0, s…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2eab76b3586d476587994c9ffbc8dc71",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)l-00005-of-00007.bin', max=1968299069.0, s…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8938db5abc13472ebf5dfd09b0035b1b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)model.bin.index.json', max=20438.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2ca6f56701154a9e8f023ef27469c09f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)ce9f/quantization.py', max=14692.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "98ed6887803448daacdc016555f6dff4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)enization_chatglm.py', max=10061.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "16ce9cf35e94499c98bb1da769573715",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading tokenizer.model', max=1018370.0, style=Progre…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "64f211b439074a8b99a44c1149284003",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)okenizer_config.json', max=244.0, style=Pr…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "snapshot_download(repo_id=model_name, cache_dir=local_model_path)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "8d666c79-b039-4258-ac3b-46b19e63c3b8",
   "metadata": {},
   "source": [
    "### 2. 把模型拷贝到S3为后续部署做准备"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "e9431deb-6359-442d-847b-1563f8dd3854",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import sagemaker\n",
    "from sagemaker import image_uris\n",
    "import boto3\n",
    "import os\n",
    "import time\n",
    "import json\n",
    "\n",
    "role = sagemaker.get_execution_role()  # execution role for the endpoint\n",
    "sess = sagemaker.session.Session()  # sagemaker session for interacting with different AWS APIs\n",
    "bucket = sess.default_bucket()  # bucket to house artifacts\n",
    "\n",
    "region = sess._region_name\n",
    "account_id = sess.account_id()\n",
    "\n",
    "s3_client = boto3.client(\"s3\")\n",
    "sm_client = boto3.client(\"sagemaker\")\n",
    "smr_client = boto3.client(\"sagemaker-runtime\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "40dd8f16-ae7c-48bf-8e52-1a15425fa74d",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "s3_code_prefix: LLM-RAG/workshop/LLM_chatglm2_stream_deploy_code\n",
      "model_snapshot_path: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f\n"
     ]
    }
   ],
   "source": [
    "s3_model_prefix = \"LLM-RAG/workshop/LLM_chatglm2_stream_model\"  # folder where model checkpoint will go\n",
    "model_snapshot_path = list(local_model_path.glob(\"**/snapshots/*\"))[0]\n",
    "s3_code_prefix = \"LLM-RAG/workshop/LLM_chatglm2_stream_deploy_code\"\n",
    "print(f\"s3_code_prefix: {s3_code_prefix}\")\n",
    "print(f\"model_snapshot_path: {model_snapshot_path}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "067292c9-c066-4649-a61f-b460a24da584",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/.gitattributes to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/.gitattributes\n",
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/README.md to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/README.md\n",
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/configuration_chatglm.py to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/configuration_chatglm.py\n",
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/config.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/config.json\n",
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/modeling_chatglm.py to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/modeling_chatglm.py\n",
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/MODEL_LICENSE to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/MODEL_LICENSE\n",
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/pytorch_model-00004-of-00007.bin to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/pytorch_model-00004-of-00007.bin\n",
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/pytorch_model-00001-of-00007.bin to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/pytorch_model-00001-of-00007.bin\n",
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/pytorch_model-00002-of-00007.bin to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/pytorch_model-00002-of-00007.bin\n",
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/pytorch_model.bin.index.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/pytorch_model.bin.index.json\n",
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/quantization.py to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/quantization.py\n",
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/tokenization_chatglm.py to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/tokenization_chatglm.py\n",
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/tokenizer.model to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/tokenizer.model\n",
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/tokenizer_config.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/tokenizer_config.json\n",
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/pytorch_model-00003-of-00007.bin to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/pytorch_model-00003-of-00007.bin\n",
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/pytorch_model-00005-of-00007.bin to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/pytorch_model-00005-of-00007.bin\n",
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/pytorch_model-00007-of-00007.bin to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/pytorch_model-00007-of-00007.bin\n",
      "upload: LLM_chatglm2_stream_model/models--THUDM--chatglm2-6b/snapshots/b1502f4f75c71499a3d566b14463edd62620ce9f/pytorch_model-00006-of-00007.bin to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/pytorch_model-00006-of-00007.bin\n"
     ]
    }
   ],
   "source": [
    "!aws s3 cp --recursive {model_snapshot_path} s3://{bucket}/{s3_model_prefix}"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "696b70c3-90f1-4175-95bf-568bafbcd383",
   "metadata": {},
   "source": [
    "### 3. 模型部署准备（entrypoint脚本，容器镜像，服务配置）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "68d60730-528b-4707-9189-6bf6f5cad754",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Image going to be used is ---- > 763104351884.dkr.ecr.us-east-2.amazonaws.com/djl-inference:0.22.1-deepspeed0.9.2-cu118\n"
     ]
    }
   ],
   "source": [
    "inference_image_uri = (\n",
    "    f\"763104351884.dkr.ecr.{region}.amazonaws.com/djl-inference:0.23.0-deepspeed0.9.5-cu118\"\n",
    ")\n",
    "\n",
    "#中国区需要替换为下面的image_uri\n",
    "# inference_image_uri = (\n",
    "#     f\"727897471807.dkr.ecr.{region}.amazonaws.com.cn/djl-inference:0.21.0-deepspeed0.8.3-cu117\"\n",
    "# )\n",
    "\n",
    "print(f\"Image going to be used is ---- > {inference_image_uri}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "8d771bdb-11d2-45d2-9bef-face29221838",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!mkdir -p LLM_chatglm2_stream_deploy_code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "e5348ecb-43df-4094-97d8-a6723004862a",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting LLM_chatglm2_stream_deploy_code/model.py\n"
     ]
    }
   ],
   "source": [
    "%%writefile LLM_chatglm2_stream_deploy_code/model.py\n",
    "from djl_python import Input, Output\n",
    "import torch\n",
    "import logging\n",
    "import math\n",
    "import os\n",
    "\n",
    "from transformers import pipeline, AutoModel, AutoTokenizer\n",
    "model = None\n",
    "tokenizer = None\n",
    "STOP_flag = \"[DONE]\"\n",
    "\n",
    "DEVICE = \"cuda\"\n",
    "DEVICE_ID = \"0\"\n",
    "CUDA_DEVICE = f\"{DEVICE}:{DEVICE_ID}\" if DEVICE_ID else DEVICE\n",
    "def torch_gc():\n",
    "    if torch.cuda.is_available():\n",
    "        with torch.cuda.device(CUDA_DEVICE):\n",
    "            torch.cuda.empty_cache()\n",
    "            torch.cuda.ipc_collect()\n",
    "            \n",
    "def load_model(properties):\n",
    "    global tokenizer,model\n",
    "    tensor_parallel = properties[\"tensor_parallel_degree\"]\n",
    "    model_location = properties['model_dir']\n",
    "    if \"model_id\" in properties:\n",
    "        model_location = properties['model_id']\n",
    "    logging.info(f\"Loading model in {model_location}\")\n",
    "    \n",
    "    tokenizer = AutoTokenizer.from_pretrained(model_location, trust_remote_code=True)\n",
    "   \n",
    "    model = AutoModel.from_pretrained(model_location, trust_remote_code=True).half().cuda()\n",
    "    \n",
    "    model.requires_grad_(False)\n",
    "    model.eval()\n",
    "    \n",
    "    return model, tokenizer\n",
    "\n",
    "\n",
    "\n",
    "def stream_items(prompt, history, max_length, top_p, temperature):\n",
    "    global model, tokenizer\n",
    "    size = 0\n",
    "    response = \"\"\n",
    "    for response, history in model.stream_chat(tokenizer, prompt, history=history, max_length=max_length, top_p=top_p,\n",
    "                                               temperature=temperature):\n",
    "        this_response = response[size:]\n",
    "        history = [list(h) for h in history]\n",
    "        size = len(response)\n",
    "        stream_buffer = { \"outputs\":this_response}\n",
    "        yield stream_buffer\n",
    "    ## stop\n",
    "    # yield {\"query\": prompt, \"outputs\": STOP_flag, \"response\": response, \"history\": history, \"finished\": True}\n",
    "    \n",
    "\n",
    "\n",
    "def handle(inputs: Input):\n",
    "    global model, tokenizer\n",
    "    if not model:\n",
    "        model, tokenizer = load_model(inputs.get_properties())\n",
    "\n",
    "    if inputs.is_empty():\n",
    "        return None\n",
    "    data = inputs.get_as_json()\n",
    "    \n",
    "    input_sentences = data[\"inputs\"]\n",
    "    params = data[\"parameters\"]\n",
    "    history = data[\"history\"]\n",
    "    print(f'input prompt:{input_sentences}')    \n",
    "    outputs = Output()\n",
    "    outputs.add_property(\"content-type\", \"application/jsonlines\")\n",
    "    outputs.add_stream_content(stream_items(input_sentences,history=history,**params))\n",
    "    return outputs\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "60a9a063-43ed-4bf2-9f51-200bca51d477",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "option.s3url ==> s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/\n"
     ]
    }
   ],
   "source": [
    "print(f\"option.s3url ==> s3://{bucket}/{s3_model_prefix}/\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "a06d1e60-3914-4059-a08f-05ac26761165",
   "metadata": {},
   "source": [
    "#### Note: option.s3url 需要按照自己的账号进行修改, 可以拷贝上一个cell的输出"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "8996fe44-8e70-468b-abc1-38187cb33f4f",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting LLM_chatglm2_stream_deploy_code/serving.properties\n"
     ]
    }
   ],
   "source": [
    "%%writefile LLM_chatglm2_stream_deploy_code/serving.properties\n",
    "engine=Python\n",
    "option.tensor_parallel_degree=1\n",
    "option.enable_streaming=True\n",
    "option.predict_timeout=240\n",
    "option.s3url = s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_model/"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "feef22a2-27b9-4018-a46b-6a99b532512f",
   "metadata": {},
   "source": [
    "#### 注意: 必须把transformers升级到4.27.1以上，否则会出现 [Issue344](https://github.com/THUDM/ChatGLM-6B/issues/344)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "7b7e76c6-6dbc-47fc-9f47-4765c526ab76",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting LLM_chatglm2_stream_deploy_code/requirements.txt\n"
     ]
    }
   ],
   "source": [
    "%%writefile LLM_chatglm2_stream_deploy_code/requirements.txt\n",
    "transformers==4.30.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "0ae6734a-aacd-410d-818d-0a962697c3c4",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LLM_chatglm2_stream_deploy_code/\n",
      "LLM_chatglm2_stream_deploy_code/model.py\n",
      "LLM_chatglm2_stream_deploy_code/requirements.txt\n",
      "LLM_chatglm2_stream_deploy_code/serving.properties\n"
     ]
    }
   ],
   "source": [
    "!rm model.tar.gz\n",
    "!cd LLM_chatglm2_stream_deploy_code && rm -rf \".ipynb_checkpoints\"\n",
    "!tar czvf model.tar.gz LLM_chatglm2_stream_deploy_code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "0f77dc76-6d8c-4665-ba88-f03e887c136c",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "S3 Code or Model tar ball uploaded to --- > s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_chatglm2_stream_deploy_code/model.tar.gz\n"
     ]
    }
   ],
   "source": [
    "s3_code_artifact = sess.upload_data(\"model.tar.gz\", bucket, s3_code_prefix)\n",
    "print(f\"S3 Code or Model tar ball uploaded to --- > {s3_code_artifact}\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "a5853daa-b8a3-4485-8c0a-64bf83e93a18",
   "metadata": {},
   "source": [
    "### 4. 创建模型 & 创建endpoint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "ef974ca1-9638-45a8-9145-ea9d03b2b072",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "chatglm-stream-2023-08-09-06-45-35-544\n",
      "Image going to be used is ---- > 763104351884.dkr.ecr.us-east-2.amazonaws.com/djl-inference:0.22.1-deepspeed0.9.2-cu118\n",
      "Created Model: arn:aws:sagemaker:us-east-2:946277762357:model/chatglm-stream-2023-08-09-06-45-35-544\n"
     ]
    }
   ],
   "source": [
    "from sagemaker.utils import name_from_base\n",
    "import boto3\n",
    "\n",
    "model_name = name_from_base(f\"chatglm-stream\") # Append a timestamp to the provided string\n",
    "print(model_name)\n",
    "print(f\"Image going to be used is ---- > {inference_image_uri}\")\n",
    "\n",
    "create_model_response = sm_client.create_model(\n",
    "    ModelName=model_name,\n",
    "    ExecutionRoleArn=role,\n",
    "    PrimaryContainer={\n",
    "        \"Image\": inference_image_uri,\n",
    "        \"ModelDataUrl\": s3_code_artifact\n",
    "    },\n",
    "    \n",
    ")\n",
    "model_arn = create_model_response[\"ModelArn\"]\n",
    "\n",
    "print(f\"Created Model: {model_arn}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "233bb3a4-d737-41ad-8fcc-7082c6278e8c",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'EndpointConfigArn': 'arn:aws:sagemaker:us-east-2:946277762357:endpoint-config/chatglm-stream-2023-08-09-06-45-35-544-config',\n",
       " 'ResponseMetadata': {'RequestId': '1373a433-4fc1-4d6d-84f1-786c1e792b79',\n",
       "  'HTTPStatusCode': 200,\n",
       "  'HTTPHeaders': {'x-amzn-requestid': '1373a433-4fc1-4d6d-84f1-786c1e792b79',\n",
       "   'content-type': 'application/x-amz-json-1.1',\n",
       "   'content-length': '126',\n",
       "   'date': 'Wed, 09 Aug 2023 06:45:37 GMT'},\n",
       "  'RetryAttempts': 0}}"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "endpoint_config_name = f\"{model_name}-config\"\n",
    "endpoint_name = f\"{model_name}-endpoint\"\n",
    "\n",
    "#Note: ml.g4dn.2xlarge 也可以选择\n",
    "endpoint_config_response = sm_client.create_endpoint_config(\n",
    "    EndpointConfigName=endpoint_config_name,\n",
    "    ProductionVariants=[\n",
    "        {\n",
    "            \"VariantName\": \"variant1\",\n",
    "            \"ModelName\": model_name,\n",
    "            \"InstanceType\": \"ml.g4dn.2xlarge\",\n",
    "            \"InitialInstanceCount\": 1,\n",
    "            # \"VolumeSizeInGB\" : 400,\n",
    "            # \"ModelDataDownloadTimeoutInSeconds\": 2400,\n",
    "            \"ContainerStartupHealthCheckTimeoutInSeconds\": 10*60,\n",
    "        },\n",
    "    ],\n",
    ")\n",
    "endpoint_config_response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "734a39b0-473e-4421-94c8-74d2b4105038",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Created Endpoint: arn:aws:sagemaker:us-east-2:946277762357:endpoint/chatglm-stream-2023-08-09-06-45-35-544-endpoint\n"
     ]
    }
   ],
   "source": [
    "create_endpoint_response = sm_client.create_endpoint(\n",
    "    EndpointName=f\"{endpoint_name}\", EndpointConfigName=endpoint_config_name\n",
    ")\n",
    "print(f\"Created Endpoint: {create_endpoint_response['EndpointArn']}\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "1262e826-a810-401d-a5a9-f62febb24e5f",
   "metadata": {},
   "source": [
    "#### 持续检测模型部署进度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "08969928-6b9e-4d9c-a033-a31f5f77bdfb",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Status: Creating\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "\n",
    "resp = sm_client.describe_endpoint(EndpointName=endpoint_name)\n",
    "status = resp[\"EndpointStatus\"]\n",
    "print(\"Status: \" + status)\n",
    "\n",
    "while status == \"Creating\":\n",
    "    time.sleep(60)\n",
    "    resp = sm_client.describe_endpoint(EndpointName=endpoint_name)\n",
    "    status = resp[\"EndpointStatus\"]\n",
    "    print(\"Status: \" + status)\n",
    "\n",
    "print(\"Arn: \" + resp[\"EndpointArn\"])\n",
    "print(\"Status: \" + status)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "d985b427-3959-46f7-9a50-5a2b45e2d513",
   "metadata": {},
   "source": [
    "### 5. 模型测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "e56bfdaa-3469-4784-aa8a-e32177cde3f2",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 4.41 ms, sys: 0 ns, total: 4.41 ms\n",
      "Wall time: 4.29 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "import json\n",
    "import boto3\n",
    "\n",
    "smr_client = boto3.client(\"sagemaker-runtime\")\n",
    "\n",
    "parameters = {\n",
    "  \"max_length\": 2048,\n",
    "  \"temperature\": 0.01,\n",
    "  \"top_p\":0.8\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "3a2b6b7b-2e54-4f83-9258-cfbb30cfc829",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import io\n",
    "\n",
    "\n",
    "class StreamScanner:\n",
    "    \"\"\"\n",
    "    A helper class for parsing the InvokeEndpointWithResponseStream event stream. \n",
    "    \n",
    "    The output of the model will be in the following format:\n",
    "    ```\n",
    "    b'{\"outputs\": [\" a\"]}\\n'\n",
    "    b'{\"outputs\": [\" challenging\"]}\\n'\n",
    "    b'{\"outputs\": [\" problem\"]}\\n'\n",
    "    ...\n",
    "    ```\n",
    "    \n",
    "    While usually each PayloadPart event from the event stream will contain a byte array \n",
    "    with a full json, this is not guaranteed and some of the json objects may be split across\n",
    "    PayloadPart events. For example:\n",
    "    ```\n",
    "    {'PayloadPart': {'Bytes': b'{\"outputs\": '}}\n",
    "    {'PayloadPart': {'Bytes': b'[\" problem\"]}\\n'}}\n",
    "    ```\n",
    "    \n",
    "    This class accounts for this by concatenating bytes written via the 'write' function\n",
    "    and then exposing a method which will return lines (ending with a '\\n' character) within\n",
    "    the buffer via the 'readlines' function. It maintains the position of the last read \n",
    "    position to ensure that previous bytes are not exposed again. \n",
    "    \"\"\"\n",
    "    \n",
    "    def __init__(self):\n",
    "        self.buff = io.BytesIO()\n",
    "        self.read_pos = 0\n",
    "        \n",
    "    def write(self, content):\n",
    "        self.buff.seek(0, io.SEEK_END)\n",
    "        self.buff.write(content)\n",
    "        \n",
    "    def readlines(self):\n",
    "        self.buff.seek(self.read_pos)\n",
    "        for line in self.buff.readlines():\n",
    "            if line[-1] != b'\\n':\n",
    "                self.read_pos += len(line)\n",
    "                yield line[:-1]\n",
    "                \n",
    "    def reset(self):\n",
    "        self.read_pos = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "e2c13afb-2c4a-41da-a58e-4d52ea13a6d9",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'outputs': {'outputs': '你', 'finished': False}}\n",
      "{'outputs': {'outputs': '好', 'finished': False}}\n",
      "{'outputs': {'outputs': '👋', 'finished': False}}\n",
      "{'outputs': {'outputs': '！', 'finished': False}}\n",
      "{'outputs': {'outputs': '我是', 'finished': False}}\n",
      "{'outputs': {'outputs': '人工智能', 'finished': False}}\n",
      "{'outputs': {'outputs': '助手', 'finished': False}}\n",
      "{'outputs': {'outputs': ' Chat', 'finished': False}}\n",
      "{'outputs': {'outputs': 'GL', 'finished': False}}\n",
      "{'outputs': {'outputs': 'M', 'finished': False}}\n",
      "{'outputs': {'outputs': '2', 'finished': False}}\n",
      "{'outputs': {'outputs': '-', 'finished': False}}\n",
      "{'outputs': {'outputs': '6', 'finished': False}}\n",
      "{'outputs': {'outputs': 'B', 'finished': False}}\n",
      "{'outputs': {'outputs': '，', 'finished': False}}\n",
      "{'outputs': {'outputs': '很高兴', 'finished': False}}\n",
      "{'outputs': {'outputs': '见到', 'finished': False}}\n",
      "{'outputs': {'outputs': '你', 'finished': False}}\n",
      "{'outputs': {'outputs': '，', 'finished': False}}\n",
      "{'outputs': {'outputs': '欢迎', 'finished': False}}\n",
      "{'outputs': {'outputs': '问我', 'finished': False}}\n",
      "{'outputs': {'outputs': '任何', 'finished': False}}\n",
      "{'outputs': {'outputs': '问题', 'finished': False}}\n",
      "{'outputs': {'outputs': '。', 'finished': False}}\n",
      "{'outputs': {'outputs': '', 'finished': False}}\n"
     ]
    }
   ],
   "source": [
    "prompts1 = \"\"\"你好\"\"\"\n",
    "# prompts1 = \"\"\"write a 500 words story about scifiction\"\"\"\n",
    "response_model = smr_client.invoke_endpoint_with_response_stream(\n",
    "            EndpointName=endpoint_name,\n",
    "            Body=json.dumps(\n",
    "            {\n",
    "                \"inputs\": prompts1,\n",
    "                \"parameters\": parameters,\n",
    "                \"history\" : []\n",
    "            }\n",
    "            ),\n",
    "            ContentType=\"application/json\",\n",
    "        )\n",
    "\n",
    "event_stream = response_model['Body']\n",
    "scanner = StreamScanner()\n",
    "for event in event_stream:\n",
    "    scanner.write(event['PayloadPart']['Bytes'])\n",
    "    for line in scanner.readlines():\n",
    "        try:\n",
    "            resp = json.loads(line)\n",
    "            print(resp)\n",
    "            # print(resp.get(\"outputs\")['outputs'], end='')\n",
    "        except Exception as e:\n",
    "            # print(line)\n",
    "            continue"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "21c8b703-e312-4964-8be9-a754468e07cd",
   "metadata": {},
   "source": [
    "#### 清除模型Endpoint和config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "f70d116f-4fb1-4f04-8732-3d6e4fb520de",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!aws sagemaker delete-endpoint --endpoint-name  {endpoint_name}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "184e4d1d-3d62-43df-9b17-5d64ece928bd",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!aws sagemaker delete-endpoint-config --endpoint-config-name {endpoint_config_name}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "6b7ae59f-caae-4719-b84f-dbc9ac36990f",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!aws sagemaker delete-model --model-name  {model_name}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "395747d2-7d5d-41e7-b23b-a922f9f411b1",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "availableInstances": [
   {
    "_defaultOrder": 0,
    "_isFastLaunch": true,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 4,
    "name": "ml.t3.medium",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 1,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 8,
    "name": "ml.t3.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 2,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.t3.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 3,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.t3.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 4,
    "_isFastLaunch": true,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 8,
    "name": "ml.m5.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 5,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.m5.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 6,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.m5.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 7,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.m5.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 8,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.m5.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 9,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.m5.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 10,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.m5.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 11,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 384,
    "name": "ml.m5.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 12,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 8,
    "name": "ml.m5d.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 13,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.m5d.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 14,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.m5d.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 15,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.m5d.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 16,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.m5d.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 17,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.m5d.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 18,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.m5d.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 19,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 384,
    "name": "ml.m5d.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 20,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": true,
    "memoryGiB": 0,
    "name": "ml.geospatial.interactive",
    "supportedImageNames": [
     "sagemaker-geospatial-v1-0"
    ],
    "vcpuNum": 0
   },
   {
    "_defaultOrder": 21,
    "_isFastLaunch": true,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 4,
    "name": "ml.c5.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 22,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 8,
    "name": "ml.c5.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 23,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.c5.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 24,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.c5.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 25,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 72,
    "name": "ml.c5.9xlarge",
    "vcpuNum": 36
   },
   {
    "_defaultOrder": 26,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 96,
    "name": "ml.c5.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 27,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 144,
    "name": "ml.c5.18xlarge",
    "vcpuNum": 72
   },
   {
    "_defaultOrder": 28,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.c5.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 29,
    "_isFastLaunch": true,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.g4dn.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 30,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.g4dn.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 31,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.g4dn.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 32,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.g4dn.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 33,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 4,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.g4dn.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 34,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.g4dn.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 35,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 61,
    "name": "ml.p3.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 36,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 4,
    "hideHardwareSpecs": false,
    "memoryGiB": 244,
    "name": "ml.p3.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 37,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 488,
    "name": "ml.p3.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 38,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 768,
    "name": "ml.p3dn.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 39,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.r5.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 40,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.r5.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 41,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.r5.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 42,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.r5.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 43,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.r5.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 44,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 384,
    "name": "ml.r5.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 45,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 512,
    "name": "ml.r5.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 46,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 768,
    "name": "ml.r5.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 47,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.g5.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 48,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.g5.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 49,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.g5.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 50,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.g5.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 51,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.g5.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 52,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 4,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.g5.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 53,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 4,
    "hideHardwareSpecs": false,
    "memoryGiB": 384,
    "name": "ml.g5.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 54,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 768,
    "name": "ml.g5.48xlarge",
    "vcpuNum": 192
   },
   {
    "_defaultOrder": 55,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 1152,
    "name": "ml.p4d.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 56,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 1152,
    "name": "ml.p4de.24xlarge",
    "vcpuNum": 96
   }
  ],
  "instance_type": "ml.t3.medium",
  "kernelspec": {
   "display_name": "Python 3 (Data Science)",
   "language": "python",
   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-2:429704687514:image/datascience-1.0"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
